* Open E&P Dataset

* Label variables

* number of nearby protests (both variables use the same label because they are used in different models)
label variable CERCANOVIOLENTA "Nearby nonviolent protests"
label variable CERCAVIOLENTA "Nearby violent protests"

* leave percentages in base 100
gen part2020=PART_2020*100
gen part2017=PART_2017*100
label variable part2017 "Turnout in 2017"
gen rechazo=P_RECHAZO*100
label variable rechazo "Reject vote (%)"
gen prural=AL*100
label variable prural "Rural population (%)"
gen pobreza=ISOC001_IndicedePobrezaCASEN*100
label variable pobreza "District poverty (%)"
gen pinera=PIERA_2VUELTA*100
label variable pinera "Vote for Piñera in 2017 (%)"
gen educacion=EDUC_MEAN
label variable educacion "District years of schooling (mean)"

* population size 
gen population=ITPC2019_PoblacinComunalEstim
* natural log of population
gen lnpob= log(population)
label variable lnpob "Population in 2019 (log)"

* mean district years of schooling
label variable EDUC_MEAN "District years of schooling (mean)"

* mean district age
label variable EDAD_MEAN "District age (mean)"

* gen cases by population
gen casoshab=TOTAL_CASOS_COMUNA/ITPC2019_PoblacinComunalEstim*1000
label variable casoshab "Cumulative number of COVID-19 cases/1K inhabitants"

* gen deaths by population
gen muertoshab=MUERTOS_TOTAL_COMUNA/ITPC2019_PoblacinComunalEstim*1000
label variable muertoshab "COVID-19 deaths/1K inhabitants)"

* gen new weekly cases by population
gen nuevoshab=NUEVOS_CASOS_S43/ITPC2019_PoblacinComunalEstim*1000
label variable nuevoshab "Weekly number of new COVID-19 cases/1K inhabitants"

* Step-by-step phases
label define FASECODEl 1 "Quarantine (reference)" 2 "Transition" 3 "Preparation" 4 "Initial opening" 
label values FASECODE FASECODEl  

* difference in turnout 2020-2017

gen difpart=part2020-part2017
label variable difpart "Difference in turnout (2017-2020)"

label variable part2020 "Turnout in 2020" 

* scatterplot of support for Pinera and turnout in 2020

scatter part2020 pinera, mlabel(COMUNA) mlabsize(tiny) name(pinera2017_turnout2020, replace) || lfitci part2020 pinera

* descriptive stats of variables

univar part2020 difpart pinera CERCANOVIOLENTA CERCAVIOLENTA casoshab muertoshab nuevoshab FASECODE EDAD_MEAN lnpob pobreza educacion prural part2017

* Descriptive graphs of dependent and independent variables
* turnout 
* protest
scatter part2020 CERCANOVIOLENTA, mlabel(COMUNA) msize(tiny) mlabsize(tiny) name(nonviolent_t, replace)
scatter part2020 CERCAVIOLENTA, mlabel(COMUNA) msize(tiny) mlabsize(tiny) name(violent_t, replace)
graph combine nonviolent_t violent_t
* pandemic
scatter part2020 casoshab, mlabel(COMUNA) msize(tiny) mlabsize(tiny) name(casoshab_t, replace)
scatter part2020 muertoshab, mlabel(COMUNA) msize(tiny) mlabsize(tiny) name(muertoshab_t, replace)
scatter part2020 nuevoshab, mlabel(COMUNA) msize(tiny) mlabsize(tiny) name(nuevoshab_t, replace)
scatter part2020 FASECODE, mlabel(COMUNA) msize(tiny) mlabsize(tiny) name(FASECODE_t, replace)
* Piñera
scatter part2020 pinera, mlabel(COMUNA) msize(tiny) mlabsize(tiny) name(pinera_t, replace) 

* difpart 
* protest
scatter difpart CERCANOVIOLENTA, mlabel(COMUNA) msize(tiny) mlabsize(tiny) name(nonviolent_d, replace)
scatter difpart CERCAVIOLENTA, mlabel(COMUNA) msize(tiny) mlabsize(tiny) name(violent_d, replace)
* pandemic
scatter difpart casoshab, mlabel(COMUNA) msize(tiny) mlabsize(tiny) name(casoshab_d, replace)
scatter difpart muertoshab, mlabel(COMUNA) msize(tiny) mlabsize(tiny) name(muertoshab_d, replace)
scatter difpart nuevoshab, mlabel(COMUNA) msize(tiny) mlabsize(tiny) name(nuevoshab_d, replace)
scatter difpart FASECODE, mlabel(COMUNA) msize(tiny) mlabsize(tiny) name(FASECODE_d, replace)
*Piñera 
scatter difpart pinera, mlabel(COMUNA) msize(tiny) mlabsize(tiny) name(pinera_d, replace) 

* protest
graph combine nonviolent_t violent_t nonviolent_d violent_d
* pandemic 
graph combine casoshab_t muertoshab_t casoshab_d muertoshab_d
graph combine nuevoshab_t FASECODE_t  nuevoshab_d FASECODE_d 
graph combine pinera_t pinera_d

* DV: turnout 2020 (nonviolent)
* interaction
reg part2020 c.CERCANOVIOLENTA##c.pinera c.casoshab c.muertoshab c.nuevoshab i.FASECODE c.EDAD_MEAN c.lnpob c.pobreza c.educacion c.prural c.part2017, vce(rob)
estimates store r1
testparm c.pinera##c.CERCANOVIOLENTA
outreg2 using TableA1.xls, append ctitle(Turnout 2020 (nonviolent protests)) 
* DV: turnout 2020 (violent)
* interaction
reg part2020 c.CERCAVIOLENTA##c.pinera c.casoshab c.muertoshab c.nuevoshab i.FASECODE c.EDAD_MEAN c.lnpob c.pobreza c.educacion  c.prural c.part2017, vce(rob)
estimates store r2
testparm c.pinera##c.CERCAVIOLENTA
outreg2 using TableA1.xls, append ctitle(Turnout 2020 (violent protests)) 

* DV: difference 2017-2020 (nonviolent)
* interaction
reg difpart c.CERCANOVIOLENTA##c.pinera c.casoshab c.muertoshab c.nuevoshab i.FASECODE c.EDAD_MEAN c.lnpob c.pobreza c.educacion c.prural, vce(rob)
estimates store r3
testparm c.pinera##c.CERCANOVIOLENTA
outreg2 using TableA1.xls, append ctitle(Difference 2017-2020 (nonviolent protest))
* DV: difference 2017-2020 (violent)
* interaction
reg difpart c.CERCAVIOLENTA##c.pinera c.casoshab c.muertoshab c.nuevoshab i.FASECODE c.EDAD_MEAN c.lnpob c.pobreza c.educacion c.prural, vce(rob)
estimates store r4
testparm c.rechazo##c.CERCAVIOLENTA
outreg2 using TableA1.xls, append ctitle(Difference 2017-2020 (violent protests))

* use coefficient plot to present results
coefplot (r1, label(nonviolent) msymbol(S)) (r2, label(violent) msymbol(T)), bylabel(Turnout in 2020)|| (r3, label(nonviolent) msymbol(S)) (r4, label(violent) msymbol(T)), bylabel(Difference 2017-2020)||, xline(0) drop(_cons) xsize(10) baselevels byopts(xrescale)

* Robustness 
* Turnout in 2020
* GLM with binomial family and logit link
* Divide DV by 100 to have vary from 0 to 1
gen ppart2020= part2020/100
* DV: turnout 2020 (nonviolent)
glm ppart2020 c.pinera##c.CERCANOVIOLENTA c.casoshab c.muertoshab c.nuevoshab i.FASECODE c.EDAD_MEAN c.lnpob c.pobreza c.educacion c.prural c.part2017, fam(bin) link(logit) vce(robust)
outreg2 using TableA2.xls, replace ctitle(GLM (nonviolent))
estimates store r5
* DV: turnout 2020 (violent)
glm ppart2020 c.pinera##c.CERCAVIOLENTA c.casoshab c.muertoshab c.nuevoshab i.FASECODE c.EDAD_MEAN c.lnpob c.pobreza c.educacion c.prural c.part2017, fam(bin) link(logit) vce(robust)
outreg2 using TableA2.xls, append ctitle(GLM (violent))
estimates store r6

* By population: size smaller and larger than median (19,688)
* Sample: smaller than population than median (nonviolent)
reg part2020 c.CERCANOVIOLENTA##c.pinera c.casoshab c.muertoshab c.nuevoshab i.FASECODE c.EDAD_MEAN c.lnpob c.pobreza c.educacion c.prural c.part2017 if population<19688, vce(rob) 
outreg2 using TableA2.xls, append ctitle(Less than median (nonviolent))
estimates store r7
* Sample: smaller than population than median (violent)
reg part2020 c.CERCAVIOLENTA##c.pinera c.casoshab c.muertoshab c.nuevoshab i.FASECODE c.EDAD_MEAN c.lnpob c.pobreza c.educacion c.prural c.part2017 if population<19688, vce(rob)
outreg2 using TableA2.xls, append ctitle(Less than median (violent))
estimates store r8 
* Sample: larger than population than median (nonviolent) 
reg part2020 c.CERCANOVIOLENTA##c.pinera c.casoshab c.muertoshab c.nuevoshab i.FASECODE c.EDAD_MEAN c.lnpob c.pobreza c.educacion c.prural c.part2017 if population>=19688, vce(rob) 
outreg2 using TableA2.xls, append ctitle(Equal or more than median (nonviolent))
estimates store r9
* Sample: larger than population than median (violent) 
reg part2020 c.CERCAVIOLENTA##c.pinera c.casoshab c.muertoshab c.nuevoshab i.FASECODE c.EDAD_MEAN c.lnpob c.pobreza c.educacion c.prural c.part2017 if population>=19688, vce(rob) 
outreg2 using TableA2.xls, append ctitle(Equal or more than median (violent))
estimates store r10

* Without outliers
* OLS of part2020 without robust SE to calculate 
* nonviolent 
reg part2020 c.CERCANOVIOLENTA##c.pinera c.casoshab c.muertoshab c.nuevoshab i.FASECODE c.EDAD_MEAN c.lnpob c.pobreza c.educacion c.prural c.part2017
lvr2plot, mlabel(COMUNA) name(lvr2plot_nonviolent_turnout, replace)
* violent
reg part2020 c.CERCAVIOLENTA##c.pinera c.casoshab c.muertoshab c.nuevoshab i.FASECODE c.EDAD_MEAN c.lnpob c.pobreza c.educacion c.prural c.part2017
lvr2plot, mlabel(COMUNA) name(lvr2plot_violent_turnout, replace)
* Most important outliers in both cases are 
* generate outlier variable
gen outlier=0
* Timaukel
replace outlier=1 if IDSINIM==12303
* Vitacura
replace outlier=1 if IDSINIM==13132
* Ollague
replace outlier=1 if IDSINIM==2202
* Torres del Paine
replace outlier=1 if IDSINIM==12402
* Providencia
replace outlier=1 if IDSINIM==13123
* María Elena
replace outlier=1 if IDSINIM==2302
* Arica
replace outlier=1 if IDSINIM==15101
* Santiago
replace outlier=1 if IDSINIM==13101
* Pica
replace outlier=1 if IDSINIM==8314
* Alto Biobío
replace outlier=1 if IDSINIM==1405
* San Juan de la Costa
replace outlier=1 if IDSINIM==10306
* Chonchi
replace outlier=1 if IDSINIM==10203
* Juan Fernández
replace outlier=1 if IDSINIM==5104
* independencia
replace outlier=1 if IDSINIM==13108
* Sample: without outliers (nonviolent)
reg part2020 c.CERCANOVIOLENTA##c.pinera c.casoshab c.muertoshab c.nuevoshab i.FASECODE c.EDAD_MEAN c.lnpob c.pobreza c.educacion c.prural c.part2017 if outlier==0, vce(rob)
estimates store r11
outreg2 using TableA2.xls, append ctitle(No outliers (nonviolent))
* Sample: without outliers (violent)
reg part2020 c.CERCAVIOLENTA##c.pinera c.casoshab c.muertoshab c.nuevoshab i.FASECODE c.EDAD_MEAN c.lnpob c.pobreza c.educacion c.prural c.part2017 if outlier==0, vce(rob)
outreg2 using TableA2.xls, append ctitle(No outliers (violent))
estimates store r12

* difference with 2017
* By population: size smaller and larger than median (19,688)
* Sample: smaller than population than median (nonviolent)
reg difpart c.CERCANOVIOLENTA##c.pinera c.casoshab c.muertoshab c.nuevoshab i.FASECODE c.EDAD_MEAN c.lnpob c.pobreza c.educacion c.prural if population<19688, vce(rob) 
outreg2 using TableA3.xls, append ctitle(Less than median (nonviolent))
estimates store r13
* Sample: smaller than population than median (violent)
reg difpart c.CERCAVIOLENTA##c.pinera c.casoshab c.muertoshab c.nuevoshab i.FASECODE c.EDAD_MEAN c.lnpob c.pobreza c.educacion c.prural if population<19688, vce(rob)
outreg2 using TableA3.xls, append ctitle(Less than median (violent))
estimates store r14 
* Sample: larger than population than median (nonviolent) 
reg difpart c.CERCANOVIOLENTA##c.pinera c.casoshab c.muertoshab c.nuevoshab i.FASECODE c.EDAD_MEAN c.lnpob c.pobreza c.educacion c.prural if population>=19688, vce(rob) 
outreg2 using TableA3.xls, append ctitle(Equal or more than median (nonviolent))
estimates store r15
* Sample: larger than population than median (violent) 
reg difpart c.CERCAVIOLENTA##c.pinera c.casoshab c.muertoshab c.nuevoshab i.FASECODE c.EDAD_MEAN c.lnpob c.pobreza c.educacion c.prural if population>=19688, vce(rob) 
outreg2 using TableA3.xls, append ctitle(Equal or more than median (violent))
estimates store r16

* Without outliers
* OLS of part2020 without robust SE to calculate 
* nonviolent 
reg difpart c.CERCANOVIOLENTA##c.pinera c.casoshab c.muertoshab c.nuevoshab i.FASECODE c.EDAD_MEAN c.lnpob c.pobreza c.educacion c.prural
lvr2plot, mlabel(COMUNA) name(lvr2plot_nonviolent_difpart, replace)
* violent
reg difpart c.CERCAVIOLENTA##c.pinera c.casoshab c.muertoshab c.nuevoshab i.FASECODE c.EDAD_MEAN c.lnpob c.pobreza c.educacion c.prural
lvr2plot, mlabel(COMUNA) name(lvr2plot_violent_difpart, replace)
* Most important outliers are the same as with turnout
* Sample: without outliers (nonviolent)
reg difpart c.CERCANOVIOLENTA##c.pinera c.casoshab c.muertoshab c.nuevoshab i.FASECODE c.EDAD_MEAN c.lnpob c.pobreza c.educacion c.prural if outlier==0, vce(rob)
estimates store r17
outreg2 using TableA3.xls, append ctitle(No outliers (nonviolent))
* Sample: without outliers (violent)
reg difpart c.CERCAVIOLENTA##c.pinera c.casoshab c.muertoshab c.nuevoshab i.FASECODE c.EDAD_MEAN c.lnpob c.pobreza c.educacion c.prural if outlier==0, vce(rob)
outreg2 using TableA3.xls, append ctitle(No outliers (violent))
estimates store r18

* using 2017 as DV with controls only, effect of age is positive
reg part2017 c.EDAD_MEAN c.lnpob c.pobreza c. educacion c.prural, vce(rob)
outreg2 using TableA4.xls, replace ctitle (turnout in 2017)


* predicted values
* use percentiles 10, 50 and 90 of predicted values
* turnout in 2020
* nonviolent
reg part2020 c.pinera##c.CERCANOVIOLENTA c.casoshab c.muertoshab c.nuevoshab i.FASECODE c.EDAD_MEAN c.lnpob c.pobreza c.educacion c.prural c.part2017, vce(rob)
margins, at(CERCANOVIOLENTA=(0(10)241) pinera =(44.19 55.9 65.23)) atmeans post
marginsplot, name(pred1, replace)
* violent
reg part2020 c.pinera##c.CERCAVIOLENTA c.casoshab c.muertoshab c.nuevoshab i.FASECODE c.EDAD_MEAN c.lnpob c.pobreza c.educacion c.prural c.part2017, vce(rob)
margins, at(CERCAVIOLENTA=(0(5)109) pinera =(44.19 55.9 65.23)) atmeans post
marginsplot, name(pred2, replace)
* deaths 
reg part2020 c.pinera##c.CERCANOVIOLENTA c.casoshab c.muertoshab c.nuevoshab i.FASECODE c.EDAD_MEAN c.lnpob c.pobreza c.educacion c.prural c.part2017, vce(rob)
margins, at(muertoshab =(0(0.2)2.238)) atmeans post
marginsplot, name(pred3, replace)
* weekly new cases
reg part2020 c.pinera##c.CERCANOVIOLENTA c.casoshab c.muertoshab c.nuevoshab i.FASECODE c.EDAD_MEAN c.lnpob c.pobreza c.educacion c.prural c.part2017, vce(rob)
margins, at(nuevoshab =(0(2)21.2)) atmeans post
marginsplot, name(pred4, replace)
* step by step
reg part2020 c.pinera##c.CERCANOVIOLENTA c.casoshab c.muertoshab c.nuevoshab i.FASECODE c.EDAD_MEAN c.lnpob c.pobreza c.educacion c.prural c.part2017, vce(rob)
margins, at(FASECODE =(1 2 3 4)) atmeans post
marginsplot, name(pred5, replace)

* difference in 2017-2020
* nonviolent
reg difpart c.pinera##c.CERCANOVIOLENTA c.casoshab c.muertoshab c.nuevoshab i.FASECODE c.EDAD_MEAN c.lnpob c.pobreza c.educacion c.prural c.part2017, vce(rob)
margins, at(CERCANOVIOLENTA=(0(10)241) pinera =(44.19 55.9 65.23)) atmeans post
marginsplot, name(pred6, replace)
* cumulative number of cases
reg difpart c.pinera##c.CERCANOVIOLENTA c.casoshab c.muertoshab c.nuevoshab i.FASECODE c.EDAD_MEAN c.lnpob c.pobreza c.educacion  c.prural, vce(rob)
margins, at(casoshab =(0(5)87)) atmeans post
marginsplot, name(pred8, replace)
* deaths 
reg difpart c.pinera##c.CERCANOVIOLENTA c.casoshab c.muertoshab c.nuevoshab i.FASECODE c.EDAD_MEAN c.lnpob c.pobreza c.educacion c.prural, vce(rob)
margins, at(muertoshab =(0(0.2)2.238)) atmeans post
marginsplot, name(pred9, replace)
* weekly new cases
reg difpart c.pinera##c.CERCANOVIOLENTA c.casoshab c.muertoshab c.nuevoshab i.FASECODE c.EDAD_MEAN c.lnpob c.pobreza c.educacion c.prural, vce(rob)
margins, at(nuevoshab =(0(2)21.2)) atmeans post
marginsplot, name(pred10, replace)
* step by step
reg difpart c.pinera##c.CERCANOVIOLENTA c.casoshab c.muertoshab c.nuevoshab i.FASECODE c.EDAD_MEAN c.lnpob c.pobreza c.educacion c.prural, vce(rob)
margins, at(FASECODE =(1 2 3 4)) atmeans post
marginsplot, name(pred11, replace)

* graphs of predicted values
* nonviolent
grc1leg2 pred1 pred6
* violent (turnout only)
* pred2
* cumulative, deaths, new
grc1leg2 pred8 pred3 pred9 pred4 pred10
* step by step
grc1leg2 pred5 pred11

* DV: share of reject vote
* nonviolent
reg rechazo c.pinera##c.CERCANOVIOLENTA c.casoshab c.muertoshab c.nuevoshab i.FASECODE c.EDAD_MEAN c.lnpob c.pobreza c.educacion c.prural c.part2017, vce(rob)
outreg2 using TableA5.xls, replace ctitle (Nonviolent)
margins, at(CERCANOVIOLENTA=(0(10)241) pinera =(44.19 55.9 65.23)) atmeans post
marginsplot, name(pred_rechazo_nonviol, replace)
* violent
reg rechazo c.pinera##c.CERCAVIOLENTA c.casoshab c.muertoshab c.nuevoshab i.FASECODE c.EDAD_MEAN c.lnpob c.pobreza c.educacion c.prural c.part2017, vce(rob)
outreg2 using TableA5xls, append ctitle (Violent)
margins, at(CERCAVIOLENTA=(0(5)109) pinera =(44.19 55.9 65.23)) atmeans post
marginsplot, name(pred_rechazo_viol, replace)
grc1leg2 pred_rechazo_nonviol pred_rechazo_viol

